# LCV Twitter Experiments Analysis File
# This file presents the balance checks

rm(list=ls())
# Uncomment and set working diretory
# setwd("")
load("LCVexp1.rdata")
load("LCVexp2.rdata")
source("LCVsource.r")
library(stargazer)
library(AER)
library(dplyr)
library(nnet)
library(xtable)
#install.packages("devtools")
#devtools::install_github("acoppock/randomizr")
library(randomizr)

## Experiment 1: Balance ####

balance_table <- 
  group_by(LCVexp1, treat) %>%
  summarize(mean_female = mean(fem_dum),
            se_female = se_mean(fem_dum),
            mean_male = mean(male_dum),
            se_male = se_mean(male_dum),
            mean_org = mean(org_dum),
            se_org = se_mean(org_dum),
            mean_unknown= mean(unk_dum),
            se_unknown= se_mean(unk_dum),
            mean_followers = mean(num_followers, na.rm=TRUE),
            se_followers = se_mean(num_followers),
            mean_days = mean(days_on_twitter, na.rm=TRUE),
            se_days = se_mean(days_on_twitter),
            mean_centrality = mean(centrality, na.rm=TRUE),
            se_centrality = se_mean(centrality),
            n = n())

## covariate tests by fisher's method

# using fisher's exact test intead of Chi-square b/c of low cell counts. 
# Justified b/c marginals are fixed by design.
p_account_type <- with(LCVexp1, chisq.test(table(account_type, treat)))$p.value
p_num_followers <- with(LCVexp1, f_tester(x = num_followers,z = treat)) 
p_num_days <- with(LCVexp1, f_tester(x = days_on_twitter,z = treat)) 
p_centrality <- with(LCVexp1, f_tester(x = centrality,z = treat)) 

p_vals <- c(rep("", 7), p_account_type, "", p_num_followers, "", 
            p_num_days, "",p_centrality, "")

balance_table <- t(balance_table)
balance_table <- balance_table[-1, ]
balance_table[seq(2, 14, 2),] <- add_parens(balance_table[seq(2, 14, 2),])
balance_table[seq(1, 13, 2),] <- format_num(balance_table[seq(1, 13, 2),])


names_vec <- c("Account Type: Female", NA,
               "Account Type: Male", NA,
               "Account Type: Organization", NA,
               "Account Type: Unknown", NA,
               "Number of Followers", NA,
               "Days on Twitter", NA,
               "Eigenvector Centrality", NA,
               "N")

balance_table <- cbind(names_vec, balance_table,format_num(p_vals))
balance_table[balance_table=="NA"] <- ""
colnames(balance_table) <- c("", "Public Tweet", "Follower", "Organizer", "p-value")

print.xtable(xtable(balance_table), include.rownames = FALSE, include.colnames=FALSE,only.contents = TRUE,
             file = "exp1balance.tex",
             add.to.row = list(pos=list(8,10,12,14), command=rep("\\cmidrule(r){2-5}",4)), hline.after=c())

# omnibus balance test



fit.u <- multinom(as.character(treat) ~ account_type + num_followers+ days_on_twitter + centrality, data=LCVexp1,verbose=FALSE)
fit.r <- multinom(as.character(treat) ~ 1, data=LCVexp1,verbose=FALSE)
llr.obs <- fit.r$deviance - fit.u$deviance

set.seed(343)
sims <- 1000
llr.sim <- rep(NA, sims)
for (i in 1:sims){
  LCVexp1$Z_sim <- complete_ra(N =  nrow(LCVexp1),m_each = table(LCVexp1$treat) ,condition_names = c("pub", "fol", "org"))
  fit.u <- multinom(Z_sim ~ account_type + num_followers + days_on_twitter + centrality, data=LCVexp1,verbose=FALSE)
  fit.r <- multinom(Z_sim ~ 1, data=LCVexp1,verbose=FALSE)
  llr.sim[i] <- fit.r$deviance - fit.u$deviance
}

hist(llr.sim)
abline(v=llr.obs, col="red")
p_omnibus <- mean(llr.sim>llr.obs)


## Experiment 2: Balance ####

balance_table <- 
  group_by(LCVexp2, treat) %>%
  summarize(mean_female = mean(fem_dum),
            se_female = se_mean(fem_dum),
            mean_male = mean(male_dum),
            se_male = se_mean(male_dum),
            mean_org = mean(org_dum),
            se_org = se_mean(org_dum),
            mean_unknown= mean(unk_dum),
            se_unknown= se_mean(unk_dum),
            mean_followers = mean(num_followers, na.rm=TRUE),
            se_followers = se_mean(num_followers),
            mean_days = mean(days_on_twitter, na.rm=TRUE),
            se_days = se_mean(days_on_twitter),
            mean_centrality = mean(centrality),
            se_centrality = se_mean(centrality),
            n = n())

## covariate tests by fisher's method

# using fisher's exact test intead of Chi-square b/c of low cell counts. 
# Justified b/c marginals are fixed by design.
p_account_type <- 
  group_by(LCVexp2, strata) %>%
  summarise(p_ind=fisher.test(table(treat, account_type))$p.value) %>%
  summarize(p = pchisq(-2 * sum(log(p_ind)), 2 * n(), lower = FALSE) ) %>%
  as.numeric

p_num_followers <- 
  group_by(LCVexp2, strata) %>%
  summarise(p_ind=f_tester(x = num_followers,z = treat)) %>%
  summarize(p = pchisq(-2 * sum(log(p_ind)), 2 * n(), lower = FALSE) )%>%
  as.numeric

p_num_days <- 
  group_by(LCVexp2, strata) %>%
  summarise(p_ind=f_tester(x = days_on_twitter,z = treat)) %>%
  summarize(p = pchisq(-2 * sum(log(p_ind)), 2 * n(), lower = FALSE) )%>%
  as.numeric

p_centrality <- 
  group_by(LCVexp2, strata) %>%
  summarise(p_ind=f_tester(x = centrality,z = treat)) %>%
  summarize(p = pchisq(-2 * sum(log(p_ind)), 2 * n(), lower = FALSE) )%>%
  as.numeric

p_vals <- c(rep("", 7), p_account_type, "", p_num_followers, "", 
            p_num_days, "",p_centrality, "")

balance_table <- t(balance_table)
balance_table <- balance_table[-1, ]
balance_table[seq(2, 14, 2),] <- add_parens(balance_table[seq(2, 14, 2),])
balance_table[seq(1, 13, 2),] <- format_num(balance_table[seq(1, 13, 2),])

names_vec <- c("Account Type: Female", NA,
               "Account Type: Male", NA,
               "Account Type: Organization", NA,
               "Account Type: Unknown", NA,
               "Number of Followers", NA,
               "Number of Tweets", NA,
               "Eigenvector Centrality", NA,
               "N")

balance_table <- cbind(names_vec, balance_table,format_num(p_vals))
balance_table[balance_table=="NA"] <- ""
colnames(balance_table) <- c("", "Public Tweet", "Follower", "Organizer", "p-value")

print.xtable(xtable(balance_table), include.rownames = FALSE, include.colnames=FALSE,only.contents = TRUE,
             file = "/Users/Alex/Documents/Dropbox/Columbia/Collaboration/ai twitter experiments/PB R and R/CGT Twitter RandR Revised Version/exp2balance.tex",
             add.to.row = list(pos=list(8,10,12,14), command=rep("\\cmidrule(r){2-5}",4)), hline.after=c())


# omnibus balance test

fit.u <- multinom(as.character(treat) ~ account_type + num_followers + days_on_twitter + centrality + strata, data=LCVexp2,verbose=FALSE)
fit.r <- multinom(as.character(treat) ~ 1, data=LCVexp2,verbose=FALSE)
llr.obs <- fit.r$deviance - fit.u$deviance

set.seed(343)
sims <- 1000
llr.sim <- rep(NA, sims)
for (i in 1:sims){
  LCVexp2$Z_sim <- strata_ra(stratum = LCVexp2$strata,treat = LCVexp2$treat)
  fit.u <- multinom(Z_sim ~ account_type + num_followers + days_on_twitter + centrality + strata, data=LCVexp2,verbose=FALSE)
  fit.r <- multinom(Z_sim ~ 1, data=LCVexp2,verbose=FALSE)
  llr.sim[i] <- fit.r$deviance - fit.u$deviance
}

hist(llr.sim, breaks=50)
abline(v=llr.obs, col="red")
p_omnibus <- mean(llr.sim>llr.obs)

#### Appendix 3: Heterogeneity

## Heterogeneity by everything else:

het_fit_1 <- lm(signed ~ treat  + centrality_centered + treat:centrality_centered, data=LCVexp1)
het_fit_2 <- lm(signed ~ treat  + num_followers_centered + treat:num_followers_centered, data=LCVexp1)
het_fit_3 <- lm(signed ~ treat  + days_on_twitter_centered + treat:days_on_twitter_centered, data=LCVexp1)
het_fit_4 <- lm(signed ~ treat  + account_type + treat:account_type, data=LCVexp1)
het_fit_5 <- lm(tweeted ~ treat  + centrality_centered + treat:centrality_centered, data=LCVexp1)
het_fit_6 <- lm(tweeted ~ treat  + num_followers_centered + treat:num_followers_centered, data=LCVexp1)
het_fit_7 <- lm(tweeted ~ treat  + days_on_twitter_centered + treat:days_on_twitter_centered, data=LCVexp1)
het_fit_8 <- lm(tweeted ~ treat  + account_type + treat:account_type, data=LCVexp1)


#sink("exp1A_het.tex")
stargazer(het_fit_1, het_fit_2, het_fit_3, het_fit_4,
          het_fit_5, het_fit_6, het_fit_7, het_fit_8,
          coef=makecoefslist(list(het_fit_1, het_fit_2, het_fit_3, het_fit_4,
                                  het_fit_5, het_fit_6, het_fit_7, het_fit_8)),
          se=makerobustseslist(list(het_fit_1, het_fit_2, het_fit_3, het_fit_4,
                                    het_fit_5, het_fit_6, het_fit_7, het_fit_8)),
          style="apsr", column.sep.width = "0pt",  digits=3, digits.extra = 0,
          omit.stat=c("adj.rsq", "f", "ser"),
          model.names=FALSE,
          covariate.labels=c("Treatment: Follower", "Treatment: Organizer", 
                             "Eigenvector Centrality", "Follower X Centrality","Organizer X Centrality",
                             "Number of Followers", "Follower X Followers","Organizer X Followers",
                             "Days on Twitter", "Follower X Days on Twitter","Organizer X Days on Twitter",
                             "Account Type: Male", "Account Type: Organization", "Account Type: Unknown",
                             "Follower X Male","Organizer X Male",
                             "Follower X Organization","Organizer X Organization",
                             "Follower X Unknown","Organizer X Unknown",
                             "Constant"),
          align=TRUE,
          notes=c("Robust standard errors in parentheses.",
                  "Eigenvector centrality, Number of Followers, and Days on Twitter in standard units and centered at zero"),
          notes.append=TRUE,font.size="scriptsize",label="tab: exp1A_het",float=TRUE,
          title="Study 1: Heterogeneous Effects of Treatments")
#sink()

het_fit_1 <- lm(signed ~ treat  + centrality_centered + treat:centrality_centered, data=LCVexp2)
het_fit_2 <- lm(signed ~ treat  + num_followers_centered + treat:num_followers_centered, data=LCVexp2)
het_fit_3 <- lm(signed ~ treat  + days_on_twitter_centered + treat:days_on_twitter_centered, data=LCVexp2)
het_fit_4 <- lm(signed ~ treat  + account_type + treat:account_type, data=LCVexp2)
het_fit_5 <- lm(tweeted ~ treat  + centrality_centered + treat:centrality_centered, data=LCVexp2)
het_fit_6 <- lm(tweeted ~ treat  + num_followers_centered + treat:num_followers_centered, data=LCVexp2)
het_fit_7 <- lm(tweeted ~ treat  + days_on_twitter_centered + treat:days_on_twitter_centered, data=LCVexp2)
het_fit_8 <- lm(tweeted ~ treat  + account_type + treat:account_type, data=LCVexp2)

#sink("exp2A_het.tex")
stargazer(het_fit_1, het_fit_2, het_fit_3, het_fit_4,
          het_fit_5, het_fit_6, het_fit_7, het_fit_8,
          coef=makecoefslist(list(het_fit_1, het_fit_2, het_fit_3, het_fit_4,
                                  het_fit_5, het_fit_6, het_fit_7, het_fit_8)),
          se=makerobustseslist(list(het_fit_1, het_fit_2, het_fit_3, het_fit_4,
                                    het_fit_5, het_fit_6, het_fit_7, het_fit_8)),
          style="apsr", column.sep.width = "0pt",  digits=3, digits.extra = 0,
          omit.stat=c("adj.rsq", "f", "ser"),
          model.names=FALSE,
          covariate.labels=c("Treatment: Follower", "Treatment: Organizer", 
                             "Eigenvector Centrality", "Follower X Centrality","Organizer X Centrality",
                             "Number of Followers", "Follower X Followers","Organizer X Followers",
                             "Days on Twitter", "Follower X Days on Twitter","Organizer X Days on Twitter",
                             "Account Type: Male", "Account Type: Organization", "Account Type: Unknown",
                             "Follower X Male","Organizer X Male",
                             "Follower X Organization","Organizer X Organization",
                             "Follower X Unknown","Organizer X Unknown",
                             "Constant"),
          align=TRUE,
          notes=c("Robust standard errors in parentheses.",
                  "Eigenvector centrality, Number of Followers, and Days on Twitter in standard units and centered at zero"),
          notes.append=TRUE,font.size="scriptsize",label="tab: exp2A_het",float=TRUE,
          title="Study 2: Heterogeneous Effects of Treatments")
#sink()
